#######################################################################

#######################################################################

## (c) Ramya Parthasarathy

## This replciation file generates the figures and tables included in ``Deliberative Democracy in an Unequal World: A Text-as-data study of South India's village assemblies''


######################################################################
######################################################################

rm(list = ls())

## Install packages (see below)
## Load Packages
pkgs <- c("xtable", "sandwich", "dplyr", "gdata",
  "ggplot2", "foreign", "gtools", "lmtest", "reshape2", "lfe", 
  "stargazer", "stm", "tm", "SnowballC", "wordcloud")

#install.packages(pkgs)
sapply(pkgs, require, character.only=TRUE)
sessionInfo()

#######################################################################
#######################################################################

## Load data from Working Directory
setwd("/Users/ramyaparthasarathy/Dropbox/Women_Delib/Submissions/APSR/Revision/Final_Submission/Replication_files")
load("GS_data2.Rda")

#######################################################################
#######################################################################
## Subset corpuses by treatment status
## Control corpus
df_c <- subset(df_new, treat == 0)
dim(df_c)

table(df_c$position_new)
df_c$position_new <- as.character(df_c$position_new)
df_c$position_new <- ifelse(df_c$position_new=="PVP", "citizen", 
	df_c$position_new)
df_c$position_new <- as.factor(df_c$position_new)
table(df_c$position_new)
class(df_c$position_new)

## Treatment corpus
df_t <- subset(df_new, treat==1)
dim(df_t)

#####################################################################
#####################################################################
## TABLE 1
#####################################################################
#####################################################################
## Control Descriptives
unique(df_c$DIST)
unique(df_c$GP)

names(df_c)

sum_stats1 <- summarise(df_c, 
	ndocs = n(), 
	total_attendance = mean(tot_att),
	perc_female = mean(female),
	perc_citizen = mean(citizen),
	perc_admin = mean(admin),
	perc_elected = mean(pol),
	avg_length = mean(speechlength)
	)

summary_stats_1 <- sum_stats1 %>%
  summarise_each(funs(mean(., na.rm = TRUE), 
  	sd(., na.rm = TRUE), 
  	median(., na.rm = TRUE), 
  	min(., na.rm = TRUE),
  	max(., na.rm = TRUE)), 
  	total_attendance, ndocs, avg_length, 
  	perc_female, perc_citizen, perc_admin, perc_elected)

summary_stats <- matrix(summary_stats_1, ncol=5, byrow=F)
summary_stats <- as.data.frame(summary_stats)
colnames(summary_stats) <- c("Mean", "Std. Dev.", "Median", 
	"Min", "Max.")
rownames(summary_stats) <- c("Total Attendance" ,"Number of Speeches", 
	"Speech Length", "Percent Female", "Percent Citizen", 
	"Percent Admin", "Percent Politician")
summary_stats

cat(print(xtable(summary_stats, align="lccccc", 
		caption=("Village-Level Summary Statistics"), 
		label=("doc_stats1")), digits=3, include.colnames=T,
		include.rownames=T, caption.placement="top", 
		size="footnotesize"), 
	file="control_doc_stats.tex")

#######################################################################
#######################################################################
## Pre-process control corpus of documents
#######################################################################
#######################################################################

temp <- textProcessor(documents=df_c$cleaned_content, 
	metadata=df_c,  removestopwords=TRUE, removenumbers=TRUE,
	removepunctuation=TRUE, stem=TRUE)

## Plot document removal across a range of lower threshold values
plotRemoved(temp$documents, lower.thresh = seq(1, 30, by = 1))

## Prep documents
out <- prepDocuments(temp$documents, temp$vocab, temp$meta, 
	lower.thresh=20, upper.thres=Inf)
GScont.docs <- out$documents
GScont.meta <- out$meta
GScont.vocab <- out$vocab

## Resulting documents
length(GScont.docs)
length(GScont.vocab)

## Choose K topics based on coherence and exclusivity
set.seed(1992)
storage <- searchK(GScont.docs, GScont.vocab, 
	K = c(seq(10, 50, by=5)),
	prevalence = ~ female + factor(position_new) + fem_pres + sc_pres, 
	max.em.its = 75,
	data = GScont.meta)

K_models <- as.data.frame(storage$results)

png("K_models_control.png",
	units="in", height=5, width=7, res=240)
	plot.searchK(storage)
dev.off()


## Then choose model parameters
K <- 15
set.seed(1992)
cont_only15 <- stm(GScont.docs, GScont.vocab, K=K, 
	prevalence = ~ female + factor(position_new) + fem_pres + sc_pres,
#	content = ~ female,
	data=GScont.meta,
	init.type="LDA")

checkResiduals(cont_only15, GScont.docs, tol=0.01)

checkBeta(cont_only15, tolerance = 0.01)


#######################################################################
#######################################################################
## FIGURE 1
#######################################################################
#######################################################################
thoughts <- findThoughts(cont_only15, 
	texts=as.character(GScont.meta$raw_content), 
	topics=c(1:K), n=50)$docs

sink("Control_15.txt")
print(thoughts)
sink()

sink("Control_15_words.txt")
print(summary(cont_only15))
sink()

topic_title_var <- c(
	"funds", 
	"sanitation", 
	"resolutions",
	"greetings", 
	"complaints", 
	"services", 
	"water", 
	"toilet",
	"SHG",
	"education",
	"lists",
	"PVP", 
	"greenhouse",
	"ration", 
	"housing")

topic_titles <- c(
	"Allocation of Funds", 
	"Maintenance of Public Goods", 
	"Announcements, Resolutions, and Voter's Pledge",
	"Greetings and Thanks", 
	"Employment & Wages", 
	"Service Failures", 
	"Water", 
	"Toilet Construction",
	"SHGs",
	"Education",
	"Beneficiary & Voter Lists",
	"Intro to PVP", 
	"Environmental Protection",
	"Ration Shop", 
	"Housing and Land Titles")

## Distribution of Topics Across Corupus
png("Control_topics15.png",
	units="in", height=6, width=11, res=240)
	par(mfrow=c(1,1))
	plot.STM(cont_only15, type="summary", text.cex=1.35, 
		custom.labels=topic_titles, xlim=c(0, 0.2), 
		main="")
dev.off()


#######################################################################
#######################################################################
## TABLE 2
#######################################################################
#######################################################################
word_table <- matrix(rep(NA, 30),nrow=30, ncol=2)

for(i in 1:K){
	word_table[i*2-1, 1] <- topic_titles[i]
	high_prob <- paste(labelTopics(cont_only15)$prob[i,], 
		sep=", ", collapse=", ")
	frex <- paste(labelTopics(cont_only15)$frex[i,], 
		sep=", ", collapse=", ")
	word_table[i*2-1,2] <- paste("Highest Prob: ", high_prob, sep="")
	word_table[i*2, 2] <- paste("FREX: ", frex, sep="")
}
colnames(word_table) <- c("Topic", "Top Words")


cat(print(xtable(word_table, align="c|p{4cm}|p{11cm}|",
	caption="Top Words by Topic", label="topicWords"),
	include.rownames=F, size="footnotesize", caption.placement="top"), 
	file="Control15_words.tex")


#######################################################################
#######################################################################
## GENERATE OUTCOME MEASURES
#######################################################################
#######################################################################

## Matrix of Topic output
topic_props <- as.data.frame(cont_only15$theta)
colnames(topic_props) <- topic_title_var
head(topic_props)

## Primary topic
max_topic <- apply(cont_only15$theta, 1, which.max)
head(max_topic)

## Secondary Topic
ind <- 2
x <- cont_only15$theta
max2_topic <- rep(NA, nrow(x))
for(i in 1:nrow(x)){
	n <- length(x[i,])
	rank <- sapply(sort(x[i,], index.return=TRUE), `[`, length(x[i,])-ind+1)
	max2_topic[i] <- as.integer(rank[2])
}

## Primary topic
a <- model.matrix(~ as.factor(max_topic) - 1)
topic_label <- paste("topic_",topic_title_var,sep="")
colnames(a) <- topic_label
head(a)

## Secondary Topic
b <- model.matrix(~ as.factor(max2_topic) - 1)
topic_label2 <- paste("topic2_",topic_title_var,sep="")
colnames(b) <- topic_label2
head(b)

## Bind into a single DF
output_df <- cbind(GScont.meta, topic_props, max_topic, max2_topic, a, b)
output_df$max_topic <- topic_title_var[output_df$max_topic]
output_df$max2_topic <- topic_title_var[output_df$max2_topic]
names(output_df)


## Generate vars for the next 5 topics discussed after each speech, and previous topic
for(i in 1:nrow(output_df)){
	## Primary topic for the next 5 speakers + previous 1 speaker
	output_df$next_topic1[i] <- ifelse(output_df$last_speech[i]==1, 
		NA, output_df$max_topic[i+1])
	output_df$next_topic2[i] <- ifelse(output_df$last_speech[i+1]==1, 
		NA, output_df$max_topic[i+2])
	output_df$next_topic3[i] <- ifelse(output_df$last_speech[i+2]==1, 
		NA, output_df$max_topic[i+3])
	output_df$next_topic4[i] <- ifelse(output_df$last_speech[i+3]==1, 
		NA, output_df$max_topic[i+4])
	output_df$next_topic5[i] <- ifelse(output_df$last_speech[i+4]==1, 
		NA, output_df$max_topic[i+5])
	output_df$prev_topic[i] <- ifelse(output_df$first_speech[i]==1, 
		NA, output_df$max_topic[i-1])

	## Secondary topic for the next 5 speakers + previous 1 speaker
	output_df$next2_topic1[i] <- ifelse(output_df$last_speech[i]==1, 
		NA, output_df$max2_topic[i+1])
	output_df$next2_topic2[i] <- ifelse(output_df$last_speech[i+1]==1, 
		NA, output_df$max2_topic[i+2])
	output_df$next2_topic3[i] <- ifelse(output_df$last_speech[i+2]==1, 
		NA, output_df$max2_topic[i+3])
	output_df$next2_topic4[i] <- ifelse(output_df$last_speech[i+3]==1, 
		NA, output_df$max2_topic[i+4])
	output_df$next2_topic5[i] <- ifelse(output_df$last_speech[i+4]==1, 
		NA, output_df$max2_topic[i+5])
	output_df$prev2_topic[i] <- ifelse(output_df$first_speech[i]==1, 
		NA, output_df$max2_topic[i-1])
}

## Generate indicators for whether the next speeches were the same topic
output_df$next1_same <- ifelse(
	output_df$max_topic==output_df$next_topic1 | 
	output_df$max_topic==output_df$next2_topic1 | 
	output_df$max2_topic==output_df$next_topic1 | 
	output_df$max2_topic==output_df$next2_topic1, 1, 0)
output_df$next2_same <- ifelse(
	output_df$max_topic==output_df$next_topic2 | 
	output_df$max_topic==output_df$next2_topic2 | 
	output_df$max2_topic==output_df$next_topic2 |
	output_df$max2_topic==output_df$next2_topic2, 1, 0)
output_df$next3_same <- ifelse(
	output_df$max_topic==output_df$next_topic3 |
	output_df$max_topic==output_df$next2_topic3 | 
	output_df$max2_topic==output_df$next_topic3 | 
	output_df$max2_topic==output_df$next2_topic3, 1, 0)
output_df$next4_same <- ifelse(
	output_df$max_topic==output_df$next_topic4 |
	output_df$max_topic==output_df$next2_topic4 | 
	output_df$max2_topic==output_df$next_topic4 | 
	output_df$max2_topic==output_df$next2_topic4, 1, 0)
output_df$next5_same <- ifelse(
	output_df$max_topic==output_df$next_topic5 |
	output_df$max_topic==output_df$next2_topic5 | 
	output_df$max2_topic==output_df$next_topic5 | 
	output_df$max2_topic==output_df$next2_topic5, 1, 0)

output_df$prop5_same <- with(output_df, (next1_same + next2_same + next3_same + next4_same + next5_same)/5)

output_df$next1_2same <- ifelse(output_df$next1_same & output_df$next2_same, 1, 0)
output_df$next1_3same <- ifelse(output_df$next1_2same & output_df$next3_same, 1, 0)
output_df$next1_4same <- ifelse(output_df$next1_3same & output_df$next4_same, 1, 0)
output_df$next1_5same <- ifelse(output_df$next1_4same & output_df$next5_same, 1, 0)
output_df$sum5_same <- with(output_df, next1_same + next2_same + next3_same + next4_same + next5_same)

output_df$length_same <- output_df$next1_same + output_df$next1_2same + output_df$next1_3same + 
							output_df$next1_4same + output_df$next1_5same

summary(output_df$prop5_same)
summary(output_df$sum5_same)

## Generate indicator for whether the speaker raises a new topic
output_df$new_topic <- ifelse(
	output_df$max_topic != output_df$prev_topic & 
	output_df$max2_topic != output_df$prev_topic & 
	output_df$max_topic != output_df$prev2_topic & 
	output_df$max2_topic != output_df$prev2_topic, 1, 0)

output_df$official_response_same <- 
	ifelse(output_df$official_response == 1 & output_df$next1_same==1, 
		1, ifelse(output_df$official_response==1 & 
			output_df$next1_same==0, 0, NA))

output_df$admin_response_same <- ifelse(output_df$admin_response == 1 
	& output_df$next1_same==1, 1, 
	ifelse(output_df$admin_response==1 & 
		output_df$next1_same==0, 0, NA))

output_df$elected_response_same <- 
	ifelse(output_df$elected_response == 1 & 
		output_df$next1_same==1, 1, ifelse(output_df$elected_response==1 & output_df$next1_same==0, 0, NA))

#####################################################################
#####################################################################
## Save workspace
save.image(file="workspace_cont15.Rdata")
rm(list=ls())

#######################################################################
#######################################################################

load("workspace_cont15.Rdata")
ls()

#####################################################################
#####################################################################

## Summary of outcome variables
summary_stats_2 <- output_df %>%
  summarise_each(funs(mean(., na.rm = TRUE), 
  	sd(., na.rm = TRUE)), 
  	female, citizen, pol, admin, next1_same, prop5_same, length_same, 
  	official_response_same, elected_response_same, admin_response_same)

cbind(t(summary_stats_2)[1:10], t(summary_stats_2)[11:20])


## Correlation coefs on outcome vars
cor(cbind(output_df$next1_same, output_df$prop5_same, output_df$length_same), 
	use="complete.obs")

cor.test(output_df$next1_same, output_df$prop5_same)
cor.test(output_df$next1_same, output_df$length_same)
cor.test(output_df$prop5_same, output_df$length_same)


#####################################################################
#####################################################################
## TABLE 3
#####################################################################
#####################################################################
require('foreign')
fs <- read.dta("merged_facesheets_GPv12.dta")
names(fs)

fs_c <- subset(fs, TreatVar==0)
mean(fs_c$fem_att_start, na.rm=T)
mean(fs_c$tot_att_start, na.rm=T)

fs_c$perc_fem <- fs_c$fem_att_start/fs_c$tot_att_start
mean(fs_c$perc_fem, na.rm=T)

## Topic List from FaceSheet data
## 1 Electricity
## 2 Employment
## 3 Health
## 4 Housing
## 5 Sanitation and Environment
## 6 Toilet
## 7 Roads
## 8 Taxes
## 9 Water
## 10 Women's Issues
## 11 Panchayat Finances
## 12 Childcare
## 13 Ration Shop
## 14 Voter ID Cards
## 15 Education and Schools
## 16 Elderly Care
## 17 Animal Care
## 18 Village Organizations


fs_stats <- summarise(fs_c,
	water=mean(topic9_ag, na.rm=T),
	employment = mean(topic2_ag, na.rm=T),
	housing = mean(topic4_ag, na.rm=T), 
	rationshop = mean(topic13_ag, na.rm=T),
	toilet = mean(topic6_ag, na.rm=T),
	environment = mean(topic5_ag, na.rm=T),
	education = mean(topic15_ag, na.rm=T) + mean(topic12_ag, na.rm=T),
	funding = mean(topic11_ag, na.rm=T) + mean(topic8_ag, na.rm=T),
	women = mean(topic18_ag, na.rm=T) + mean(topic10_ag, na.rm=T)
	)

fs_stats

valid <- as.data.frame(t(fs_stats))[c(1:ncol(fs_stats)),1]
valid <- as.numeric(as.character(valid))



valid2 <- c(
	## Water
	mean(output_df$water),
	## Employment
	mean(output_df$complaints),
	## Housing
	mean(output_df$housing), 
	## Ration Shop
	mean(output_df$ration),
	## Toilets
	mean(output_df$toilet),
	## Sanitation
	mean(output_df$sanitation) + mean(output_df$greenhouse),
	## Education
	mean(output_df$education),
	## Funding
	mean(output_df$funds),
	## Women
	mean(output_df$SHG) + mean(output_df$PVP)
	)

topic_title_var
validation <- cbind(valid2, valid)


colnames(validation) <- c("Transcript Data", "Survey Data")
rownames(validation) <- c("Water", "Wages and Employment", 
	"Housing", "Ration Shop", "Toilets", "Environment and Sanitation", "Education", 
	"Funding", "Women's Issues")

validation

cat(print(xtable(validation, align="lcc", digits=4, 
		label="valid_1",
		include.rownames=T, 
		caption="Validation of Topical Prevalence Using Survey Data"), 
		add.to.row = list(list(9),  
			"\\hline  \\multicolumn{3}{L{10cm}}{\\footnotesize \\textit{Note:}
             This table presents the relative frequency of topics across both our survey and transcript data. Categories collected in the survey data were post-coded by issue area. For transcript data, documents were coded as a mixture of topics. As such, we take the share of all documents associated with that topic.  Direct comparisons across the dataset was not possible for all topics, as there were only a limited set of clear analogues.} \\\\"),
		caption.placement="top", 
		size = "footnotesize",
		latex.environments=NULL), 
	file="valid_control.tex")


#####################################################################
#####################################################################
## FIGURE 2
#####################################################################
#####################################################################

## Validation by examining proforma topics
proforma <- c(1, 3, 4)

prep_model2 <- estimateEffect(proforma ~ citizen, cont_only15, metadata=GScont.meta, 
	uncertainty="None")

png("officialValid_control.png", 
	units="in", width=6, height=4, res=240)
par(mfrow=c(1,1))
plot.estimateEffect(prep_model2, "citizen", model=cont_only15, 
	method="difference", labeltype="custom",
	cov.value1=0, cov.value2=1, 
	xlab="Citizen                              Officials",
	#main=NULL,
	xlim=c(-0.05, 0.05),
	ci.level=0.95, width=150,
	verbose.labels=F, custom.labels=topic_titles[proforma])
dev.off()

#####################################################################
#####################################################################
## Citizens speak more than administrators

a <- t.test(df_c$citizen==0, df_c$citizen==1) ## All speakers
b <- t.test(df_c[df_c$female==0,]$citizen==0, 
	df_c[df_c$female==0,]$citizen==1) ## Men only
c <- t.test(df_c[df_c$female==1,]$citizen==0, 
	df_c[df_c$female==1,]$citizen==1) ## Female only

t_df1 <- t(matrix(c(a$estimate, a$statistic, a$p.value,
	b$estimate, b$statistic, b$p.value,
	c$estimate, c$statistic, c$p.value), nrow=4))

rownames(t_df1) <- c("All Speakers", "Men Only", 
	"Women Only")
colnames(t_df1) <- c("Mean, Official Speeches", "Mean, Citizen Speeches", "t-statistic", "p value")
t_df1

cat(print(xtable(t_df1, align="lcccc", digits=4, 
		label="speeches_position",
		caption="Frequency of Speeches, by Position",
	    #floating="FALSE", latex.environments=NULL,
		include.rownames=T), caption.placement="top", 
		size = "footnotesize"), 
	file="speeches_position.tex")

#####################################################################
#####################################################################
## TABLE 4
#####################################################################
#####################################################################

names(fs_c)
voters <- subset(fs_c, select=c("GPID_s", "total_voters", 
	"fem_voters",	"male_voters"))

df_c1 <- merge(df_c, voters, by="GPID_s", all.x=T)
dim(df_c)

grouped <- group_by(df_c1[df_c1$citizen==1,], GPID_s)
speech_pos <- summarise(grouped,
	total = n(),
	female = sum(female),
	#n_govt_fem = mean(n_govt_fem),
	fem_att_perc = mean(fem_att)/mean(tot_att),
	male_att_perc = mean(male_att)/mean(tot_att),
	fem_vote_perc = mean(fem_voters)/mean(total_voters),
	male_vote_perc = mean(male_voters)/mean(total_voters)
	)

speech_pos$fem_perc <- speech_pos$female / speech_pos$total
speech_pos$male_perc <- 1 - speech_pos$fem_perc

speech_pos$male_norm <- speech_pos$male_perc / speech_pos$male_att_perc
speech_pos$fem_norm <- speech_pos$fem_perc / speech_pos$fem_att_perc

speech_pos$male_norm2 <- speech_pos$male_perc / speech_pos$male_vote_perc
speech_pos$fem_norm2 <- speech_pos$fem_perc / speech_pos$fem_vote_perc

a <- t.test(speech_pos$male_perc, speech_pos$fem_perc)
b <- t.test(speech_pos$male_norm, speech_pos$fem_norm)
c <- t.test(speech_pos$male_norm2, speech_pos$fem_norm2)

t_df <- t(matrix(c(a$estimate, a$statistic, a$p.value,
	b$estimate, b$statistic, b$p.value,
	c$estimate, c$statistic, c$p.value), nrow=4))

rownames(t_df) <- c("Raw Differences", "Normalized by Attendance Share", 
	"Normlized by Population Share")
colnames(t_df) <- c("Mean, Male Speeches", "Mean, Female Speeches", "t-statistic", "p value")
t_df

cat(print(xtable(t_df, align="lcccc", digits=4, 
		label="speeches_gender",
		caption="Frequency of Citizen Speeches, by Gender",
	    #floating="FALSE", latex.environments=NULL,
		include.rownames=T), caption.placement="top", 
		size = "footnotesize"), 
	file="speeches_gender2.tex")

#####################################################################
#####################################################################
## TABLE 5
#####################################################################
#####################################################################

e <- felm(female ~ fem_pres + female_literacy + fem_att | 
	DIST | 0 | DIST, 
	data=output_df[output_df$citizen==1,])
summary(e)

e1 <- felm(female ~ fem_pres + fem_att + female_literacy + 
	pscore2011 | 
	DIST | 0 | DIST, 
	data=output_df[output_df$citizen==1,])
summary(e1)

f <- felm(female ~ fem_pres + fem_att + female_literacy | 
	DIST | 0 | DIST, 
	data=output_df[output_df$admin==1,])
summary(f)

f1 <- felm(female ~ fem_pres + fem_att + female_literacy + 
	pscore2011 | 
	DIST | 0 | DIST, 
	data=output_df[output_df$admin==1,])
summary(f1)

g <- felm(female ~ fem_pres + fem_att + female_literacy | 
	DIST | 0 | DIST, 
	data=output_df[output_df$pol==1,])
summary(g)

g1 <- felm(female ~ fem_pres + fem_att + female_literacy + 
	pscore2011 | 
	DIST | 0 | DIST, 
	data=output_df[output_df$pol==1,])
summary(g1)

## Fake models for stargazer
fake1e <- lm(female ~ fem_pres + fem_att + female_literacy - 1, 
	data=output_df[output_df$citizen==1,])
fake2e <- lm(female ~ fem_pres + fem_att + female_literacy + pscore2011 - 1, 
	data=output_df[output_df$citizen==1,])

fake1f <- lm(female ~ fem_pres + fem_att + female_literacy - 1, 
	data=output_df[output_df$admin==1,])
fake2f <- lm(female ~ fem_pres + fem_att + female_literacy + pscore2011 - 1, 
	data=output_df[output_df$admin==1,])

fake1g <- lm(female ~ fem_pres + fem_att + female_literacy - 1, 
	data=output_df[output_df$pol==1,])
fake2g <- lm(female ~ fem_pres + fem_att + female_literacy + pscore2011 - 1, 
	data=output_df[output_df$pol==1,])


## Output to LaTeX
fake_m <- list(fake1e, fake2e, fake1f, fake2f, fake1g, fake2g)

include_models <- list(e, e1, f, f1, g, g1)

coef_list <- lapply(include_models, function(x) x$coefficients)
se_list <- lapply(include_models, function(x) x$cse)

addtl_line <- c("District FE", rep(c("$\\checkmark$"), 6))
addtl_line2 <- c("Backwardness Score Control", rep(c("", "$\\checkmark$"), 3))

stargazer(fake_m,
    coef = coef_list,
    se = se_list,
    dep.var.labels="Female Speech",
    column.labels = c("Citizens", "Citizens", "Admin.",  "Admin.",  "Politicians",  "Politicians"),
    covariate.labels = c("Female President", "Female Attendance", "Female Literacy"),
    keep.stat = "n",
    omit=c("pscore2011"),
    star.char = c("*", "**", "***"),
    p.auto = TRUE,
    star.cutoffs = c(.1, 0.05, 0.03),
    add.lines = list(addtl_line, addtl_line2),
    notes = "\\parbox[t]{10cm}{{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01. \\ Robust Standard Errors, clustered at the district, in parenthesis. The Backwarndess Score is an measure of village level development, calculated using demographic and infrastructal variables, including the share of population belonging to the Scheduled Castes and Tribes, as well as indicators for the presence of a primary or secondary school, hospital or medical clinic, and bank.}}",
    notes.append = FALSE,
    font.size="scriptsize",
    notes.align = "l",
    table.placement = "ht!",
    digits = 2,
    label = "Speech_by_Gender",
    title = "Frequency of Female Speech",
    out = "speechByGender.tex")

#####################################################################
#####################################################################
## APPENDIX TABLE D2
#####################################################################
#####################################################################

j <- felm(female ~ fem_pres + fem_att + female_literacy | 
	DIST | 0 | DIST, 
	data=output_df[output_df$pol==1 & output_df$position != "president",])
summary(j)

j1 <- felm(female ~ fem_pres + fem_att + female_literacy + 
	pscore2011 | 
	DIST | 0 | DIST, 
		data=output_df[output_df$pol==1 & output_df$position != "president",])
summary(j1)

fake1j <- lm(female ~ fem_pres + fem_att + female_literacy - 1, 
	data=output_df[output_df$pol==1,])
fake2j <- lm(female ~ fem_pres + fem_att + female_literacy + pscore2011 - 1, 
	data=output_df[output_df$pol==1,])


## Output to LaTeX
fake_m <- list(fake1j, fake2j)

include_models <- list(j, j1)

coef_list <- lapply(include_models, function(x) x$coefficients)
se_list <- lapply(include_models, function(x) x$cse)

addtl_line <- c("District FE", rep(c("$\\checkmark$"), 2))
addtl_line2 <- c("Backwardness Score Control", rep(c("", "$\\checkmark$"), 1))


stargazer(fake_m,
    coef = coef_list,
    se = se_list,
    dep.var.labels="Female Speech",
    column.labels = c("Politicians",  "Politicians"),
    covariate.labels = c("Female President", "Female Attendance", "Female Literacy"),
    keep.stat = "n",
    omit=c("pscore2011"),
    star.char = c("*", "**", "***"),
    p.auto = TRUE,
    star.cutoffs = c(.1, 0.05, 0.03),
    add.lines = list(addtl_line, addtl_line2),
    notes = "\\parbox[t]{5cm}{{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01. \\ Robust Standard Errors, clustered at the district, in parenthesis. The Backwarndess Score is an measure of village level development, calculated using demographic and infrastructal variables, including the share of population belonging to the Scheduled Castes and Tribes, as well as indicators for the presence of a primary or secondary school, hospital or medical clinic, and bank.}}",
    notes.append = FALSE,
    font.size="scriptsize",
    notes.align = "l",
    table.placement = "ht!",
    digits = 2,
    label = "Speech_by_Gender_PolOnly",
    title = "Frequency of Female Speech among Politicians",
    out = "speechByGender_PolOnly.tex")

names(output_df)
unique(output_df$DIST)

#######################################################################
#######################################################################
## TABLE 6
#######################################################################
#######################################################################
a <- t.test(speechlength ~ female, data=df_c)
b <- t.test(speechlength ~ female, data=df_c[df_c$citizen==1,])
c <- t.test(speechlength ~ female, data=df_c[df_c$admin==1,])
d <- t.test(speechlength ~ female, data=df_c[df_c$pol==1,])


t_df <- t(matrix(c(a$estimate, a$statistic, a$p.value,
	b$estimate, b$statistic, b$p.value,
	c$estimate, c$statistic, c$p.value, 
	d$estimate, d$statistic, d$p.value), nrow=4))

rownames(t_df) <- c("All Speakers", "Citizens Only", 
	"Administrators Only", 
	"Politicians Only")
colnames(t_df) <- c("Mean, Male Speeches", "Mean, Female Speeches", "t-statistic", "p value")

cat(print(xtable(t_df, align="lcccc", digits=4, 
		caption="Length of Speeches, by Gender",
	    #floating="FALSE", latex.environments=NULL,
		include.rownames=T, label="speechlength_gender"), 
		size = "footnotesize", caption.placement="top"), 
	file="speechlength_gender.tex")


#######################################################################
#######################################################################
## TABLE 7
#######################################################################
#######################################################################

df_c2 <- df_c %>% 
	group_by(GP, female, position_new) %>%
	summarise(floortime = sum(speechlength, na.rm=T))

df_c2 <- dcast(df_c2, GP ~ female + position_new, value.var="floortime")
df_c2$male_all <- rowSums(df_c2[,c(2:4)], na.rm=T)
df_c2$female_all <- rowSums(df_c2[,c(5:7)], na.rm=T)

head(df_c2)
colnames(df_c2) <- c("GP","male_cit", "male_admin", "male_pol", "fem_cit", "fem_admin", "fem_pol", 
	"male_all", "fem_all")

summary(df_c2$male_pol)
summary(df_c2$fem_pol)

a <- t.test(df_c2$male_all, df_c2$fem_all, na.rm=T)
b <- t.test(df_c2$male_cit, df_c2$fem_cit, na.rm=T)
c <- t.test(df_c2$male_admin, df_c2$fem_admin, na.rm=T)
d <- t.test(df_c2$male_pol, df_c2$fem_pol, na.rm=T)



t_df <- t(matrix(c(a$estimate, a$statistic, a$p.value,
	b$estimate, b$statistic, b$p.value,
	c$estimate, c$statistic, c$p.value, 
	d$estimate, d$statistic, d$p.value), nrow=4))

rownames(t_df) <- c("All Speakers", "Citizens Only", 
	"Administrators Only", 
	"Politicians Only")
colnames(t_df) <- c("Mean, Male Floortime", 
	"Mean, Female Floortime", "t-statistic", "p value")

cat(print(xtable(t_df, align="lcccc", digits=4, 
		caption="Assembly Floortime, by Gender",
	    #floating="FALSE", latex.environments=NULL,
		include.rownames=T, label="floortime_gender"), 
		size = "footnotesize", caption.placement="top"), 
	file="floortime_gender.tex")


#######################################################################
#######################################################################
## TABLE 8
#######################################################################
#######################################################################
## Does the next person continue to discuss your issue?
a <- summarise(group_by(output_df, female, citizen),
	next1same = mean(next1_same, na.rm=T),
	length_same = mean(length_same, na.rm=T),
	sum5_same = mean(sum5_same, na.rm=T)
	)
a <- t(a)	
colnames(a) <- c("Male Pol/Admin", "Male Citizen", "Female Pol/Admin", "Female Citizen")
a <- a[c(3:5),]
a

## Citizens are more likley to set the agenda than officials; the gram sabha really is a place for citizen discourse
a <- t.test(next1_same ~ citizen, output_df)
b <- t.test(prop5_same ~ citizen, output_df)
c <- t.test(length_same ~ citizen, output_df)

t_df <- t(matrix(c(a$estimate, a$statistic, a$p.value,
	b$estimate, b$statistic, b$p.value,
	c$estimate, c$statistic, c$p.value), nrow=4))

rownames(t_df) <- c("Next Topic Same", 
	"Perc. Same (Next 5 speeches)", "Length Topic")
colnames(t_df) <- c("Mean, Officials", 
	"Mean, Citizens", "t-statistic", "p value")

cat(print(xtable(t_df, align="lcccc", digits=4, 
		caption="Agenda Power by Position (All Speeches)",
	    #floating="FALSE", latex.environments=NULL,
		include.rownames=T, label="agenda_position"), 
		size = "footnotesize", caption.placement="top"), 
	file="agenda_position.tex")

#######################################################################
#######################################################################
## TABLE 9
#######################################################################
#######################################################################
## Among only new topics
a <- t.test(next1_same ~ citizen, output_df[output_df$new_topic==1,])
b <- t.test(prop5_same ~ citizen, output_df[output_df$new_topic==1,])
c <- t.test(length_same ~ citizen, output_df[output_df$new_topic==1,])

t_df <- t(matrix(c(a$estimate, a$statistic, a$p.value,
	b$estimate, b$statistic, b$p.value,
	c$estimate, c$statistic, c$p.value), nrow=4))

rownames(t_df) <- c("Next Topic Same", 
	"Perc. Same (Next 5 speeches)", "Length Topic")
colnames(t_df) <- c("Mean, Officials", 
	"Mean, Citizens", "t-statistic", "p value")

cat(print(xtable(t_df, align="lcccc", digits=4, 
		caption="Agenda Power by Position (New Topics Only)",
	    #floating="FALSE", latex.environments=NULL,
		include.rownames=T, label="agenda_new_position"), 
		size = "footnotesize", caption.placement="top"), 
	file="agenda_new_position.tex")

#######################################################################
#######################################################################
## FIGURE 4
#######################################################################
#######################################################################

## Women are slighly less likely to have others continue talking about issues they raise
a <- t.test(next1_same ~ female, output_df)
b <- t.test(prop5_same ~ female, output_df)
c <- t.test(length_same ~ female, output_df)

## Women are slighly less likely to have others continue talking about issues they raise
a <- t.test(next1_same ~ female, output_df[output_df$citizen==1,])
b <- t.test(prop5_same ~ female, output_df[output_df$citizen==1,])
c <- t.test(length_same ~ female, output_df[output_df$citizen==1,])


t_df <- t(matrix(c(a$estimate, a$statistic, a$p.value,
	b$estimate, b$statistic, b$p.value,
	c$estimate, c$statistic, c$p.value), nrow=4))

rownames(t_df) <- c("Next Topic Same", 
	"Perc. Same (Next 5 speeches)", "Length Topic")
colnames(t_df) <- c("Mean, Male Speakers", 
	"Mean, Female Speakers", "t-statistic", "p value")

cat(print(xtable(t_df, align="lcccc", digits=4, 
		caption="Agenda Power by Gender (Citizen Speeches)",
	    #floating="FALSE", latex.environments=NULL,
		include.rownames=T, label="agenda_gender"), 
		size = "footnotesize", caption.placement="top"), 
	file="agenda_gender.tex")


nonproforma <- c(12, 9, 10, 6, 15, 11, 2, 13, 8, 7, 5, 14)
prep_model2 <- estimateEffect(nonproforma ~ citizen*female, cont_only15, metadata=GScont.meta, 
	uncertainty="None")

png("genderCitizen_control.png", 
	units="in", width=7, height=5, res=240)
par(mfrow=c(1,1))
a <- plot.estimateEffect(prep_model2, "female", model=cont_only15, 
	method="difference", labeltype="custom",
	moderator="citizen", moderator.value=1,
	cov.value1=1, cov.value2=0, 
	xlab="Male                              Female",
	xlim=c(-0.06, 0.06), width=100,
	ci.level=0.95, 
	verbose.labels=F, custom.labels=topic_titles[nonproforma])
dev.off()


#######################################################################
#######################################################################
## TABLE 10
#######################################################################
#######################################################################

e <- felm(next1_same ~ female*citizen + pscore2011 + fem_pres | 
	DIST | 0 | DIST, 
	data=output_df)
summary(e)

e1 <- felm(next1_same ~ female*citizen + pscore2011 + fem_pres | 
	DIST + factor(max_topic) | 0 | DIST, data=output_df)
summary(e1)

f <- felm(prop5_same ~ female*citizen + pscore2011  + fem_pres | 
	DIST  | 0 | DIST, 
	data=output_df)
summary(f)

f1 <- felm(prop5_same ~ female*citizen + pscore2011  + fem_pres | 
	DIST + factor(max_topic) | 0 | DIST, data=output_df)
summary(f1)

g <- felm(length_same ~ female*citizen + pscore2011  + fem_pres | 
	DIST  | 0 | DIST, 
	data=output_df)
summary(g)

g1 <- felm(length_same ~ female*citizen + pscore2011  + fem_pres | 
	DIST + factor(max_topic)| 0 | DIST, data=output_df)
summary(g1)


## Fake models for stargazer
fake1e <- lm(next1_same ~ female*citizen  + pscore2011 + fem_pres - 1, 
	data=output_df)
fake2e <- lm(next1_same ~ female*citizen  + pscore2011+ fem_pres  - 1, 
	data=output_df)

fake1f <- lm(prop5_same ~ female*citizen + pscore2011 + fem_pres - 1, 
	data=output_df)
fake2f <- lm(prop5_same ~ female*citizen + pscore2011 + fem_pres - 1, 
	data=output_df)

fake1g <- lm(length_same ~ female*citizen + pscore2011 + fem_pres - 1, 
	data=output_df)
fake2g <- lm(length_same ~ female*citizen + pscore2011 + fem_pres - 1, 
	data=output_df)


## Output to LaTeX
fake_m <- list(fake1e, fake2e, fake1f, fake2f, fake1g, fake2g)

include_models <- list(e, e1, f, f1, g, g1)

coef_list <- lapply(include_models, function(x) x$coefficients)
se_list <- lapply(include_models, function(x) x$cse)

addtl_line <- c("District FE", rep(c("$\\checkmark$"), 6))
addtl_line1 <- c("Backwardness Score Control", rep(c("$\\checkmark$"), 6))
addtl_line3 <- c("Female President Control", rep(c("$\\checkmark$"), 6))
addtl_line2 <- c("Topic FE", 
	rep(c("", "$\\checkmark$"), 3))

stargazer(fake_m,
    coef = coef_list,
    se = se_list,
    dep.var.labels=c("Next Same", "\\% Next 5 Same", "Length Topic"),
    covariate.labels = c("Female", "Citizen",
    	"Female x Citizen"),
    keep.stat = "n",
    omit=c("pscore2011", "fem_pres"),
    star.char = c("*", "**", "***"),
    p.auto = TRUE,
    star.cutoffs = c(.1, 0.05, 0.03),
    add.lines = list(addtl_line, addtl_line1, addtl_line2, addtl_line3),
    notes = "\\parbox[t]{8cm}{{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01. \\ Robust Standard Errors, clustered at the district, in parenthesis. The Backwarndess Score is an measure of village level development, calculated using demographic and infrastructal variables, including the share of population belonging to the Scheduled Castes and Tribes, as well as indicators for the presence of a primary or secondary school, hospital or medical clinic, and bank.}}",
    notes.append = FALSE,
    font.size="scriptsize",
    notes.align = "l",
    table.placement = "ht!",
    digits = 2,
    label = "agendaGenderPosition",
    title = "Agenda Setting Power, by Gender and Position",
    out = "agendaByGenderPoisition.tex")

#######################################################################
#######################################################################
## FIGURE 3
#######################################################################
#######################################################################

#install.packages("interplot")
e1_lm <- lm(next1_same ~ female*citizen + pscore2011 + fem_pres + 
	factor(DIST), data=output_df)

e2_lm <- lm(prop5_same ~ female*citizen + pscore2011 + fem_pres + factor(DIST) + factor(max_topic), data=output_df)

e3_lm <- lm(length_same ~ female*citizen + pscore2011 + fem_pres + factor(DIST) + factor(max_topic), data=output_df)

require("interplot")
png(file="next1_femcit.png", 
	res=240, units="in", height=4, width=4)
interplot(m=e1_lm, var1="female", var2="citizen", ci=0.90) + 
	xlab("Citizen") + ylab("Coefficient for I(Female)") + theme_bw() +
	geom_hline(yintercept=0, linetype="dashed", color="red")
dev.off()

png(file="lengthsame_femcit.png", 
	res=240, units="in", height=4, width=4)
interplot(m=e3_lm, var1="female", var2="citizen", ci=0.9) + 
	xlab("Citizen") + ylab("Coefficient for I(Female)") + theme_bw() +
	geom_hline(yintercept=0, linetype="dashed", color="red")
dev.off() 


#######################################################################
#######################################################################
## APPENDIX TABLE D.1
#######################################################################
#######################################################################
e <- felm(next1_same ~ female*fem_pres + pscore2011  | 
	DIST | 0 | DIST, 
	data=output_df[output_df$citizen==1,])
summary(e)

e1 <- felm(next1_same ~ female*fem_pres + pscore2011 | 
	DIST + factor(max_topic) | 0 | DIST, data=output_df[output_df$citizen==1,])
summary(e1)

f <- felm(prop5_same ~ female*fem_pres + pscore2011 | 
	DIST  | 0 | DIST, 
	data=output_df[output_df$citizen==1,])
summary(f)

f1 <- felm(prop5_same ~ female*fem_pres + pscore2011 | 
	DIST + factor(max_topic) | 0 | DIST, data=output_df[output_df$citizen==1,])
summary(f1)

g <- felm(length_same ~ female*fem_pres + pscore2011  | 
	DIST  | 0 | DIST, 
	data=output_df[output_df$citizen==1,])
summary(g)

g1 <- felm(length_same ~ female*fem_pres + pscore2011 | 
	DIST + factor(max_topic)| 0 | DIST, data=output_df[output_df$citizen==1,])
summary(g1)


## Fake models for stargazer
fake1e <- lm(next1_same ~ female*fem_pres  + pscore2011- 1, 
	data=output_df[output_df$citizen==1,])
fake2e <- lm(next1_same ~ female*fem_pres  + pscore2011 - 1, 
	data=output_df[output_df$citizen==1,])

fake1f <- lm(prop5_same ~ female*fem_pres + pscore2011 - 1, 
	data=output_df[output_df$citizen==1,])
fake2f <- lm(prop5_same ~ female*fem_pres + pscore2011 - 1, 
	data=output_df[output_df$citizen==1,])

fake1g <- lm(length_same ~ female*fem_pres + pscore2011 - 1, 
	data=output_df[output_df$citizen==1,])
fake2g <- lm(length_same ~ female*fem_pres + pscore2011 - 1, 
	data=output_df[output_df$citizen==1,])


## Output to LaTeX
fake_m <- list(fake1e, fake2e, fake1f, fake2f, fake1g, fake2g)

include_models <- list(e, e1, f, f1, g, g1)

coef_list <- lapply(include_models, function(x) x$coefficients)
se_list <- lapply(include_models, function(x) x$cse)

addtl_line <- c("District FE", rep(c("$\\checkmark$"), 6))
addtl_line1 <- c("Backwardness Score Control", rep(c("$\\checkmark$"), 6))
addtl_line2 <- c("Topic FE", 
	rep(c("", "$\\checkmark$"), 3))

stargazer(fake_m,
    coef = coef_list,
    se = se_list,
    dep.var.labels=c("Next Same", "\\% Next 5 Same", "Length Topic"),
    covariate.labels = c("Female", "Fem. Pres.", "Female x Fem. Pres."),
    keep.stat = "n",
    omit=c("pscore2011"),
    star.char = c("*", "**", "***"),
    p.auto = TRUE,
    star.cutoffs = c(.1, 0.05, 0.03),
    add.lines = list(addtl_line, addtl_line1, addtl_line2),
    notes = "\\parbox[t]{8cm}{{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01. \\ Sample include only speeches delivered by citizens (all administrator and politician speech is excluded). Robust Standard Errors, clustered at the district, in parenthesis. The Backwarndess Score is an measure of village level development, calculated using demographic and infrastructal variables, including the share of population belonging to the Scheduled Castes and Tribes, as well as indicators for the presence of a primary or secondary school, hospital or medical clinic, and bank.}}",
    notes.append = FALSE,
    font.size="scriptsize",
    notes.align = "l",
    table.placement = "ht!",
    digits = 2,
    label = "agendaGenderPres",
    title = "Agenda Setting Power, by Gender of Speaker and Gender of President",
    out = "agendaByGenderPres.tex")



#######################################################################
#######################################################################
## FIGURE 5
#######################################################################
#######################################################################

#install.packages("interplot")
e1_lm <- lm(next1_same ~ female*fem_pres + pscore2011 + factor(DIST) + factor(max_topic), 
	data=output_df[output_df$citizen==1,])


e2_lm <- lm(prop5_same ~ female*fem_pres + pscore2011 + factor(DIST) + factor(max_topic), 
	data=output_df[output_df$citizen==1,])


e3_lm <- lm(length_same ~ female*fem_pres + pscore2011 + factor(DIST) + factor(max_topic), 
	data=output_df[output_df$citizen==1,])


png(file="next1_fempres.png", 
	res=240, units="in", height=4, width=4)
interplot(m=e1_lm, var1="female", var2="fem_pres", ci=0.95) + 
	xlab("Female President") + ylab("Coefficient for I(Female)") + theme_bw() +
	geom_hline(yintercept=0, linetype="dashed", color="red")
dev.off()

png(file="lengthsame_fempres.png", 
	res=240, units="in", height=4, width=4)
interplot(m=e3_lm, var1="female", var2="fem_pres", ci=0.95) + 
	xlab("Female President") + ylab("Coefficient for I(Female)") + theme_bw() +
	geom_hline(yintercept=0, linetype="dashed", color="red")
dev.off() 



#######################################################################
#######################################################################
## TABLE 11
#######################################################################
#######################################################################

## Men and women are equally likely to receive an official response to their issue
a <- t.test(official_response ~ female, output_df[output_df$citizen==1,])

## BUT Women are significantly less likely to have an official respond on topic
b <- t.test(official_response_same ~ female, output_df[output_df$citizen==1,])

## Driven largely by the apathy of elected officials, not administrators
c <- t.test(elected_response_same ~ female, output_df[output_df$citizen==1,])
d <- t.test(admin_response_same ~ female, output_df[output_df$citizen==1,])


## Output t.tests regarding official responsiveness 
t_df <- t(matrix(c(a$estimate, a$statistic, a$p.value,
	b$estimate, b$statistic, b$p.value,
	c$estimate, c$statistic, c$p.value, 
	d$estimate, d$statistic, d$p.value), nrow=4))

rownames(t_df) <- c("Any Official Response", "On Topic Official Response (All)", 
	"On Topic Politician Response", 
	"On Topic Administrator Response")
colnames(t_df) <- c("Mean, Male Citizens", "Mean, Female Citizens", "t-statistic", "p value")

cat(print(xtable(t_df, align="lcccc", digits=4, 
		label = "official_response_gender", 
		caption = "Likelihood of Official Response, by Gender"),
		size = "footnotesize", 
		include.rownames=T, caption.placement="top"), 
	file="official_response_gender.tex")




#######################################################################
#######################################################################
## TABLE 12
#######################################################################
#######################################################################
## But female presidents are much more responsive to their female constituents
table(df_fs[df_fs$projectstatus == "Non-Project",]$fem_pres)

e <- felm(elected_response_same ~ female + new_topic + pscore2011 | 
	DIST | 0 | DIST, 
	data=output_df[output_df$citizen==1,])
summary(e)

e1 <- felm(elected_response_same ~ female*fem_pres + new_topic + pscore2011 | 
	DIST | 0 | DIST, 
	data=output_df[output_df$citizen==1,])
summary(e1)


e2 <- felm(elected_response_same ~ female*fem_pres + new_topic + pscore2011 | 
	DIST + factor(max_topic) | 0 | DIST, 
	data=output_df[output_df$citizen==1,])
summary(e2)


f <- felm(admin_response_same ~ female + new_topic + pscore2011 | 
	DIST | 0 | DIST, 
	data=output_df[output_df$citizen==1,])
summary(f)

f1 <- felm(admin_response_same ~ female*fem_pres + new_topic + pscore2011 | 
	DIST | 0 | DIST, 
	data=output_df[output_df$citizen==1,])
summary(f1)

f2 <- felm(admin_response_same ~ female*fem_pres + new_topic + pscore2011 | 
	DIST + factor(max_topic) | 0 | DIST, 
	data=output_df[output_df$citizen==1,])
summary(f2)


## Fake models for stargazer
fake0e <- lm(elected_response_same ~ female + new_topic + pscore2011 - 1, 
	data = output_df[output_df$citizen==1,])

fake1e <- lm(elected_response_same ~ female*fem_pres + new_topic+ pscore2011 - 1, 
	data = output_df[output_df$citizen==1,])

fake2e <- lm(elected_response_same ~ female*fem_pres + new_topic + pscore2011  - 1, 
	data = output_df[output_df$citizen==1,])

fake0f <- lm(admin_response_same ~ female + new_topic + pscore2011 - 1, 
	data = output_df[output_df$citizen==1,])

fake1f <- lm(admin_response_same ~ female*fem_pres + new_topic + pscore2011 - 1, 
	data = output_df[output_df$citizen==1,])

fake2f <- lm(admin_response_same ~ female*fem_pres + new_topic + pscore2011 - 1, 
	data = output_df[output_df$citizen==1,])



## Output to LaTeX
fake_m <- list(fake0e, fake1e, fake2e, fake0f, fake1f, fake2f)

include_models <- list(e, e1, e2, f, f1, f2)

coef_list <- lapply(include_models, function(x) x$coefficients)
se_list <- lapply(include_models, function(x) x$cse)

addtl_line <- c("District FE", rep(c("$\\checkmark$"), 6))
addtl_line1 <- c("Backwardness Score Control", rep(c("$\\checkmark$"), 6))
addtl_line2 <- c("Topic FE", rep(c("", "","$\\checkmark$"), 2))

stargazer(fake_m,
    coef = coef_list,
    se = se_list,
    dep.var.labels=c("On Topic Politician Response", "On Topic Admin. Response"),
    covariate.labels = c("Female", "Female President", "New Topic", "Female x Female President"),
    keep.stat = "n",
    omit=c("pscore2011"),
    star.char = c("*", "**", "***"),
    p.auto = TRUE,
    star.cutoffs = c(.1, 0.05, 0.03),
    add.lines = list(addtl_line, addtl_line1,addtl_line2),
    notes = "\\parbox[t]{8cm}{{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01. \\ Robust Standard Errors, clustered at the district, in parenthesis. The Backwarndess Score is an measure of village level development, calculated using demographic and infrastructal variables, including the share of population belonging to the Scheduled Castes and Tribes, as well as indicators for the presence of a primary or secondary school, hospital or medical clinic, and bank.}}",
    notes.append = FALSE,
    font.size="scriptsize",
    notes.align = "l",
    table.placement = "ht!",
    digits = 2,
    label = "responseGenderPres",
    title = "Official Responsiveness, by Gender",
    out = "responseGenderPres.tex")

mean(output_df[output_df$citizen==1 & output_df$female==0,]$elected_response_same, na.rm=T)

mean(output_df[output_df$citizen==1 & output_df$female==0,]$admin_response_same, na.rm=T)
